library(GGally)
Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2
prestige %>% 
  skim()
── Data Summary ────────────────────────
                           Values    
Name                       Piped data
Number of rows             102       
Number of columns          6         
_______________________              
Column type frequency:               
  factor                   1         
  numeric                  5         
________________________             
Group variables            None      

Drop NAs and remove ‘census’ variable

Feature engineering / variable transformation

prestige_log <- prestige %>% 
  mutate(ln_women = log(0.1 + women),
         ln_income = log(income))

skim(prestige_log)
── Data Summary ────────────────────────
                           Values      
Name                       prestige_log
Number of rows             98          
Number of columns          7           
_______________________                
Column type frequency:                 
  factor                   1           
  numeric                  6           
________________________               
Group variables            None        
prestige_log %>% 
  select(prestige, everything()) %>% 
  ggpairs(aes(colour = type,
              alpha = 0.5))

 plot: [1,1] [=>--------------------------------------------------------------------------------------------------------]  2% est: 0s 
 plot: [1,2] [===>------------------------------------------------------------------------------------------------------]  4% est: 3s 
 plot: [1,3] [=====>----------------------------------------------------------------------------------------------------]  6% est: 2s 
 plot: [1,4] [========>-------------------------------------------------------------------------------------------------]  8% est: 4s 
 plot: [1,5] [==========>-----------------------------------------------------------------------------------------------] 10% est: 3s 
 plot: [1,6] [============>---------------------------------------------------------------------------------------------] 12% est: 3s 
 plot: [1,7] [==============>-------------------------------------------------------------------------------------------] 14% est: 3s 
 plot: [2,1] [================>-----------------------------------------------------------------------------------------] 16% est: 3s 
 plot: [2,2] [==================>---------------------------------------------------------------------------------------] 18% est: 3s 
 plot: [2,3] [=====================>------------------------------------------------------------------------------------] 20% est: 3s 
 plot: [2,4] [=======================>----------------------------------------------------------------------------------] 22% est: 3s 
 plot: [2,5] [=========================>--------------------------------------------------------------------------------] 24% est: 3s 
 plot: [2,6] [===========================>------------------------------------------------------------------------------] 27% est: 3s 
 plot: [2,7] [=============================>----------------------------------------------------------------------------] 29% est: 2s 
 plot: [3,1] [===============================>--------------------------------------------------------------------------] 31% est: 2s 
 plot: [3,2] [==================================>-----------------------------------------------------------------------] 33% est: 2s 
 plot: [3,3] [====================================>---------------------------------------------------------------------] 35% est: 2s 
 plot: [3,4] [======================================>-------------------------------------------------------------------] 37% est: 2s 
 plot: [3,5] [========================================>-----------------------------------------------------------------] 39% est: 2s 
 plot: [3,6] [==========================================>---------------------------------------------------------------] 41% est: 2s 
 plot: [3,7] [============================================>-------------------------------------------------------------] 43% est: 2s 
 plot: [4,1] [===============================================>----------------------------------------------------------] 45% est: 2s 
 plot: [4,2] [=================================================>--------------------------------------------------------] 47% est: 2s 
 plot: [4,3] [===================================================>------------------------------------------------------] 49% est: 2s 
 plot: [4,4] [=====================================================>----------------------------------------------------] 51% est: 2s 
 plot: [4,5] [=======================================================>--------------------------------------------------] 53% est: 2s 
 plot: [4,6] [=========================================================>------------------------------------------------] 55% est: 1s 
 plot: [4,7] [============================================================>---------------------------------------------] 57% est: 1s 
 plot: [5,1] [==============================================================>-------------------------------------------] 59% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [5,2] [================================================================>-----------------------------------------] 61% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [5,3] [==================================================================>---------------------------------------] 63% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [5,4] [====================================================================>-------------------------------------] 65% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [5,5] [======================================================================>-----------------------------------] 67% est: 1s 
 plot: [5,6] [=========================================================================>--------------------------------] 69% est: 1s 
 plot: [5,7] [===========================================================================>------------------------------] 71% est: 1s 
 plot: [6,1] [=============================================================================>----------------------------] 73% est: 1s 
 plot: [6,2] [===============================================================================>--------------------------] 76% est: 1s 
 plot: [6,3] [=================================================================================>------------------------] 78% est: 1s 
 plot: [6,4] [===================================================================================>----------------------] 80% est: 1s 
 plot: [6,5] [======================================================================================>-------------------] 82% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [6,6] [========================================================================================>-----------------] 84% est: 1s 
 plot: [6,7] [==========================================================================================>---------------] 86% est: 1s 
 plot: [7,1] [============================================================================================>-------------] 88% est: 0s 
 plot: [7,2] [==============================================================================================>-----------] 90% est: 0s 
 plot: [7,3] [================================================================================================>---------] 92% est: 0s 
 plot: [7,4] [===================================================================================================>------] 94% est: 0s 
 plot: [7,5] [=====================================================================================================>----] 96% est: 0s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [7,6] [=======================================================================================================>--] 98% est: 0s 
 plot: [7,7] [==========================================================================================================]100% est: 0s 
                                                                                                                                      

Investigate the large outliers for income:

prestige %>% 
  slice_max(income, n = 10)

Start building our model:

Best predictor first

mod1a <- lm(prestige ~ education,
            data = prestige)

summary(mod1a)

Call:
lm(formula = prestige ~ education, data = prestige)

Residuals:
    Min      1Q  Median      3Q     Max 
-21.605  -6.151   0.366   6.565  17.540 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -10.8409     3.5285  -3.072  0.00276 ** 
education     5.3884     0.3168  17.006  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 8.578 on 96 degrees of freedom
Multiple R-squared:  0.7508,    Adjusted R-squared:  0.7482 
F-statistic: 289.2 on 1 and 96 DF,  p-value: < 2.2e-16

For every unit increase in education, prestige increases 5.3 if all other variables are held constant. Education can explain 75% of variation in prestige

autoplot(mod1a)

All plots look good - we could accept these

mod1b <- lm(prestige ~ type,
            data = prestige)

summary(mod1b)

Call:
lm(formula = prestige ~ type, data = prestige)

Residuals:
     Min       1Q   Median       3Q      Max 
-18.2273  -7.1773  -0.0854   6.1174  25.2565 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   35.527      1.432  24.810  < 2e-16 ***
typeprof      32.321      2.227  14.511  < 2e-16 ***
typewc         6.716      2.444   2.748  0.00718 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 9.499 on 95 degrees of freedom
Multiple R-squared:  0.6976,    Adjusted R-squared:  0.6913 
F-statistic: 109.6 on 2 and 95 DF,  p-value: < 2.2e-16
autoplot(mod1b)

Although both models look good, mod1a is better (based on r^2 and RSE), so we would choose ‘education’ as our first predictor

now we want to see which variables describe the ‘residue’ - the unexplained model error

change the variable of interest

prestige_resid <- prestige_log %>% 
  add_residuals(mod1a) %>% 
  select(-c(prestige, education))

prestige_resid %>% 
  select(resid, everything()) %>% 
  ggpairs(aes(colour = type,
              alpha = 0.5))

 plot: [1,1] [==>------------------------------------------------------------------------------------------------------]  3% est: 0s 
 plot: [1,2] [=====>---------------------------------------------------------------------------------------------------]  6% est: 2s 
 plot: [1,3] [========>------------------------------------------------------------------------------------------------]  8% est: 2s 
 plot: [1,4] [===========>---------------------------------------------------------------------------------------------] 11% est: 2s 
 plot: [1,5] [==============>------------------------------------------------------------------------------------------] 14% est: 2s 
 plot: [1,6] [=================>---------------------------------------------------------------------------------------] 17% est: 2s 
 plot: [2,1] [===================>-------------------------------------------------------------------------------------] 19% est: 2s 
 plot: [2,2] [======================>----------------------------------------------------------------------------------] 22% est: 2s 
 plot: [2,3] [=========================>-------------------------------------------------------------------------------] 25% est: 2s 
 plot: [2,4] [============================>----------------------------------------------------------------------------] 28% est: 2s 
 plot: [2,5] [===============================>-------------------------------------------------------------------------] 31% est: 2s 
 plot: [2,6] [==================================>----------------------------------------------------------------------] 33% est: 2s 
 plot: [3,1] [=====================================>-------------------------------------------------------------------] 36% est: 2s 
 plot: [3,2] [========================================>----------------------------------------------------------------] 39% est: 2s 
 plot: [3,3] [===========================================>-------------------------------------------------------------] 42% est: 2s 
 plot: [3,4] [==============================================>----------------------------------------------------------] 44% est: 1s 
 plot: [3,5] [=================================================>-------------------------------------------------------] 47% est: 1s 
 plot: [3,6] [===================================================>-----------------------------------------------------] 50% est: 1s 
 plot: [4,1] [======================================================>--------------------------------------------------] 53% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [4,2] [=========================================================>-----------------------------------------------] 56% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [4,3] [============================================================>--------------------------------------------] 58% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [4,4] [===============================================================>-----------------------------------------] 61% est: 1s 
 plot: [4,5] [==================================================================>--------------------------------------] 64% est: 1s 
 plot: [4,6] [=====================================================================>-----------------------------------] 67% est: 1s 
 plot: [5,1] [========================================================================>--------------------------------] 69% est: 1s 
 plot: [5,2] [===========================================================================>-----------------------------] 72% est: 1s 
 plot: [5,3] [==============================================================================>--------------------------] 75% est: 1s 
 plot: [5,4] [=================================================================================>-----------------------] 78% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [5,5] [====================================================================================>--------------------] 81% est: 1s 
 plot: [5,6] [=======================================================================================>-----------------] 83% est: 1s 
 plot: [6,1] [=========================================================================================>---------------] 86% est: 0s 
 plot: [6,2] [============================================================================================>------------] 89% est: 0s 
 plot: [6,3] [===============================================================================================>---------] 92% est: 0s 
 plot: [6,4] [==================================================================================================>------] 94% est: 0s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [6,5] [=====================================================================================================>---] 97% est: 0s 
 plot: [6,6] [=========================================================================================================]100% est: 0s 
                                                                                                                                     

things to model next based on this:

add second predictor - the one that explains the most residual error

mod2a<- lm(prestige ~ education + ln_income,
           data = prestige_log)

summary(mod2a)

Call:
lm(formula = prestige ~ education + ln_income, data = prestige_log)

Residuals:
     Min       1Q   Median       3Q      Max 
-16.6775  -4.0506  -0.2538   4.0648  16.7992 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -101.1882    12.8490  -7.875 5.51e-12 ***
education      4.0375     0.3173  12.726  < 2e-16 ***
ln_income     12.0564     1.6719   7.211 1.33e-10 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.932 on 95 degrees of freedom
Multiple R-squared:  0.8389,    Adjusted R-squared:  0.8356 
F-statistic: 247.4 on 2 and 95 DF,  p-value: < 2.2e-16
autoplot(mod2a)

mod2b<- lm(prestige ~ education + income,
           data = prestige_log)

summary(mod2b)

Call:
lm(formula = prestige ~ education + income, data = prestige_log)

Residuals:
     Min       1Q   Median       3Q      Max 
-16.9367  -4.8881   0.0116   4.9690  15.9280 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -7.6210352  3.1162309  -2.446   0.0163 *  
education    4.2921076  0.3360645  12.772  < 2e-16 ***
income       0.0012415  0.0002185   5.682 1.45e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 7.45 on 95 degrees of freedom
Multiple R-squared:  0.814, Adjusted R-squared:  0.8101 
F-statistic: 207.9 on 2 and 95 DF,  p-value: < 2.2e-16
autoplot(mod2b)

We can reject this model in favour of mod2a

mod2c<- lm(prestige ~ education + type,
           data = prestige_log)

summary(mod2c)

Call:
lm(formula = prestige ~ education + type, data = prestige_log)

Residuals:
    Min      1Q  Median      3Q     Max 
-19.410  -5.508   1.360   5.694  17.171 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -2.6982     5.7361  -0.470   0.6392    
education     4.5728     0.6716   6.809 9.16e-10 ***
typeprof      6.1424     4.2590   1.442   0.1526    
typewc       -5.4585     2.6907  -2.029   0.0453 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 7.814 on 94 degrees of freedom
Multiple R-squared:  0.7975,    Adjusted R-squared:  0.791 
F-statistic: 123.4 on 3 and 94 DF,  p-value: < 2.2e-16
autoplot(mod2c)

when ‘type’ categories have different levels of significance, either use all or none of them - DON”T CHERRY PICK

We can reject this model in favour of mod2a

Results:

check for significance of categorical: ANOVA test

anova(mod1a, mod2c)
Analysis of Variance Table

Model 1: prestige ~ education
Model 2: prestige ~ education + type
  Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
1     96 7064.4                                  
2     94 5740.0  2    1324.4 10.844 5.787e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

the means of the categorical levels are statistically different, therefore all 3 levels could be included

third predictor

prestige_resid <- prestige_log %>% 
  add_residuals(mod2a) %>% 
  select(-c(prestige, education, ln_income))

prestige_resid %>% 
  select(resid, everything()) %>%
  ggpairs(aes(colour = type,
              alpha = 0.5))

 plot: [1,1] [===>-----------------------------------------------------------------------------------------------------]  4% est: 0s 
 plot: [1,2] [=======>-------------------------------------------------------------------------------------------------]  8% est: 1s 
 plot: [1,3] [============>--------------------------------------------------------------------------------------------] 12% est: 1s 
 plot: [1,4] [================>----------------------------------------------------------------------------------------] 16% est: 1s 
 plot: [1,5] [====================>------------------------------------------------------------------------------------] 20% est: 1s 
 plot: [2,1] [========================>--------------------------------------------------------------------------------] 24% est: 1s 
 plot: [2,2] [============================>----------------------------------------------------------------------------] 28% est: 1s 
 plot: [2,3] [=================================>-----------------------------------------------------------------------] 32% est: 1s 
 plot: [2,4] [=====================================>-------------------------------------------------------------------] 36% est: 1s 
 plot: [2,5] [=========================================>---------------------------------------------------------------] 40% est: 1s 
 plot: [3,1] [=============================================>-----------------------------------------------------------] 44% est: 1s 
 plot: [3,2] [=================================================>-------------------------------------------------------] 48% est: 1s 
 plot: [3,3] [======================================================>--------------------------------------------------] 52% est: 1s 
 plot: [3,4] [==========================================================>----------------------------------------------] 56% est: 1s 
 plot: [3,5] [==============================================================>------------------------------------------] 60% est: 1s 
 plot: [4,1] [==================================================================>--------------------------------------] 64% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [4,2] [======================================================================>----------------------------------] 68% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [4,3] [===========================================================================>-----------------------------] 72% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [4,4] [===============================================================================>-------------------------] 76% est: 0s 
 plot: [4,5] [===================================================================================>---------------------] 80% est: 0s 
 plot: [5,1] [=======================================================================================>-----------------] 84% est: 0s 
 plot: [5,2] [===========================================================================================>-------------] 88% est: 0s 
 plot: [5,3] [================================================================================================>--------] 92% est: 0s 
 plot: [5,4] [====================================================================================================>----] 96% est: 0s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [5,5] [=========================================================================================================]100% est: 0s 
                                                                                                                                     

things to model next based on this:

mod3a <- lm(prestige ~ education + ln_income + type,
           data = prestige_log)

summary(mod3a)

Call:
lm(formula = prestige ~ education + ln_income + type, data = prestige_log)

Residuals:
    Min      1Q  Median      3Q     Max 
-13.511  -3.746   1.011   4.356  18.438 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -81.2019    13.7431  -5.909 5.63e-08 ***
education     3.2845     0.6081   5.401 5.06e-07 ***
ln_income    10.4875     1.7167   6.109 2.31e-08 ***
typeprof      6.7509     3.6185   1.866   0.0652 .  
typewc       -1.4394     2.3780  -0.605   0.5465    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.637 on 93 degrees of freedom
Multiple R-squared:  0.8555,    Adjusted R-squared:  0.8493 
F-statistic: 137.6 on 4 and 93 DF,  p-value: < 2.2e-16
autoplot(mod3a)

do an anova test on types

anova(mod2a, mod3a)
Analysis of Variance Table

Model 1: prestige ~ education + ln_income
Model 2: prestige ~ education + ln_income + type
  Res.Df    RSS Df Sum of Sq      F   Pr(>F)   
1     95 4565.4                                
2     93 4096.3  2    469.07 5.3247 0.006465 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

the results are significant - we can include

add type to model

mod3b <- lm(prestige ~ education + ln_income + women,
           data = prestige_log)

summary(mod3b)

Call:
lm(formula = prestige ~ education + ln_income + women, data = prestige_log)

Residuals:
     Min       1Q   Median       3Q      Max 
-16.8202  -4.7019   0.0696   4.2245  17.6833 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -129.16790   18.95716  -6.814 8.97e-10 ***
education      3.59404    0.38431   9.352 4.39e-15 ***
ln_income     15.60547    2.43246   6.416 5.62e-09 ***
women          0.06481    0.03270   1.982   0.0504 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.828 on 94 degrees of freedom
Multiple R-squared:  0.8454,    Adjusted R-squared:  0.8405 
F-statistic: 171.4 on 3 and 94 DF,  p-value: < 2.2e-16
autoplot(mod3b)

Interactions (between main effects)

prestige_resid <- prestige_log %>% 
  add_residuals(mod3a) %>% 
  select(-prestige)
coplot(resid ~ ln_income | education,
       panel = function(x, y, ...) {
         points(x, y)
         abline(lm(y ~ x), col = "blue")
       },
       data = prestige_resid,
       rows = 1)

prestige_resid %>% 
  ggplot(aes(x = education,
             y = resid,
             colour = type)) +
  geom_point()+
  geom_smooth(method = "lm", se = FALSE)
`geom_smooth()` using formula 'y ~ x'

prestige_resid %>% 
  ggplot(aes(x = ln_income,
             y = resid,
             colour = type)) +
  geom_point()+
  geom_smooth(method = "lm", se = FALSE)
`geom_smooth()` using formula 'y ~ x'

Including the interactions

mod4a <- lm(prestige ~ education + ln_income + type + education:ln_income,
            data = prestige_log)

summary(mod4a)

Call:
lm(formula = prestige ~ education + ln_income + type + education:ln_income, 
    data = prestige_log)

Residuals:
     Min       1Q   Median       3Q      Max 
-13.6129  -4.1461   0.7695   4.1546  17.9453 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)         -166.7630    51.0506  -3.267 0.001530 ** 
education             11.1444     4.5601   2.444 0.016438 *  
ln_income             20.2741     5.8790   3.449 0.000852 ***
typeprof               6.8626     3.5803   1.917 0.058374 .  
typewc                -2.3516     2.4103  -0.976 0.331796    
education:ln_income   -0.8892     0.5114  -1.739 0.085414 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.566 on 92 degrees of freedom
Multiple R-squared:  0.8601,    Adjusted R-squared:  0.8525 
F-statistic: 113.1 on 5 and 92 DF,  p-value: < 2.2e-16
mod4b <- lm(prestige ~ education + ln_income + type + education:type,
            data = prestige_log)

summary(mod4b)

Call:
lm(formula = prestige ~ education + ln_income + type + education:type, 
    data = prestige_log)

Residuals:
    Min      1Q  Median      3Q     Max 
-13.962  -3.797   1.041   4.092  16.503 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)        -81.6672    14.3681  -5.684 1.57e-07 ***
education            3.1407     0.9004   3.488 0.000751 ***
ln_income           10.6833     1.7108   6.245 1.33e-08 ***
typeprof            15.6176    14.2168   1.099 0.274871    
typewc             -30.4466    18.3465  -1.660 0.100451    
education:typeprof  -0.5801     1.2211  -0.475 0.635887    
education:typewc     2.6675     1.7551   1.520 0.132018    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.585 on 91 degrees of freedom
Multiple R-squared:  0.8608,    Adjusted R-squared:  0.8516 
F-statistic: 93.79 on 6 and 91 DF,  p-value: < 2.2e-16
mod4c <- lm(prestige ~ education + ln_income + type + type:ln_income,
            data = prestige_log)

summary(mod4c)

Call:
lm(formula = prestige ~ education + ln_income + type + type:ln_income, 
    data = prestige_log)

Residuals:
    Min      1Q  Median      3Q     Max 
-13.484  -4.453   1.122   4.123  18.737 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)        -118.4325    20.3728  -5.813 8.97e-08 ***
education             3.2107     0.5993   5.357 6.31e-07 ***
ln_income            14.9336     2.4928   5.991 4.12e-08 ***
typeprof             82.7757    31.5059   2.627   0.0101 *  
typewc               51.3717    36.8521   1.394   0.1667    
ln_income:typeprof   -8.5690     3.5251  -2.431   0.0170 *  
ln_income:typewc     -6.1925     4.3172  -1.434   0.1549    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.491 on 91 degrees of freedom
Multiple R-squared:  0.8647,    Adjusted R-squared:  0.8558 
F-statistic: 96.96 on 6 and 91 DF,  p-value: < 2.2e-16
autoplot(mod4c)

anova(mod3a, mod4c)
Analysis of Variance Table

Model 1: prestige ~ education + ln_income + type
Model 2: prestige ~ education + ln_income + type + type:ln_income
  Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
1     93 4096.3                              
2     91 3834.2  2    262.13 3.1107 0.04934 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Relative Importance

calc.relimp(mod3a, type = "lmg", rela = TRUE)
Response variable: prestige 
Total response variance: 292.2358 
Analysis based on 98 observations 

4 Regressors: 
Some regressors combined in groups: 
        Group  type : typeprof typewc 

 Relative importance of 3 (groups of) regressors assessed: 
 type education ln_income 
 
Proportion of variance explained by model: 85.55%
Metrics are normalized to sum to 100% (rela=TRUE). 

Relative importance metrics: 

                lmg
type      0.3352553
education 0.3831467
ln_income 0.2815980

Average coefficients for different model sizes: 

             1group   2groups   3groups
education  5.388408  4.305159  3.284486
ln_income 24.619472 12.879804 10.487471
typeprof  32.321114 14.810837  6.750887
typewc     6.716206  1.013706 -1.439403
calc.relimp(mod4c, type = "lmg", rela = TRUE)
Response variable: prestige 
Total response variance: 292.2358 
Analysis based on 98 observations 

6 Regressors: 
Some regressors combined in groups: 
        Group  type : typeprof typewc 
        Group  ln_income:type : ln_income:typeprof ln_income:typewc 

 Relative importance of 4 (groups of) regressors assessed: 
 type ln_income:type education ln_income 
 
Proportion of variance explained by model: 86.47%
Metrics are normalized to sum to 100% (rela=TRUE). 

Relative importance metrics: 

                      lmg
type           0.38512449
ln_income:type 0.01146495
education      0.29662098
ln_income      0.30678958

Average coefficients for different model sizes: 

                      1group   2groups   3groups   4groups
education           5.388408  4.305159  3.284486  3.210707
ln_income          24.619472 12.879804 14.634783 14.933637
typeprof           32.321114 14.810837 54.690682 82.775730
typewc              6.716206  1.013706 41.131305 51.371716
ln_income:typeprof       NaN       NaN -9.001091 -8.568986
ln_income:typewc         NaN       NaN -8.979324 -6.192503
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShjYXIpCmxpYnJhcnkobW9kZWxyKQpsaWJyYXJ5KHNraW1yKQpsaWJyYXJ5KEdHYWxseSkKbGlicmFyeShnZ2ZvcnRpZnkpCmBgYAoKCmBgYHtyfQpwcmVzdGlnZSA8LSBQcmVzdGlnZQpgYGAKCgpgYGB7cn0KcHJlc3RpZ2UgJT4lIAogIHNraW0oKQpgYGAKCkRyb3AgTkFzIGFuZCByZW1vdmUgJ2NlbnN1cycgdmFyaWFibGUKCmBgYHtyfQpwcmVzdGlnZSA8LSBQcmVzdGlnZSAlPiUgCiAgZHJvcF9uYSgpICU+JSAKICBzZWxlY3QoLWNlbnN1cykKYGBgCgoKRmVhdHVyZSBlbmdpbmVlcmluZyAvIHZhcmlhYmxlIHRyYW5zZm9ybWF0aW9uCgoqIExvZ2dlZCBzb21lIHBvc2l0aXZlbHkgc2tld2VkIG51bWVyaWNzCiAgKyB3b21lbgogICsgaW5jb21lCgpgYGB7cn0KcHJlc3RpZ2VfbG9nIDwtIHByZXN0aWdlICU+JSAKICBtdXRhdGUobG5fd29tZW4gPSBsb2coMC4xICsgd29tZW4pLAogICAgICAgICBsbl9pbmNvbWUgPSBsb2coaW5jb21lKSkKCnNraW0ocHJlc3RpZ2VfbG9nKQpgYGAKCgpgYGB7cn0KcHJlc3RpZ2UgJT4lIAogIHNlbGVjdChwcmVzdGlnZSwgZXZlcnl0aGluZygpKSAlPiUgCiAgZ2dwYWlycyhhZXMoY29sb3VyID0gdHlwZSwKICAgICAgICAgICAgICBhbHBoYSA9IDAuNSkpYQpgYGAKCgpgYGB7cn0KcHJlc3RpZ2VfbG9nICU+JSAKICBzZWxlY3QocHJlc3RpZ2UsIGV2ZXJ5dGhpbmcoKSkgJT4lIAogIGdncGFpcnMoYWVzKGNvbG91ciA9IHR5cGUsCiAgICAgICAgICAgICAgYWxwaGEgPSAwLjUpKQpgYGAKCiogQnkgbG9va2luZyBhdCB0aGUgbG9ncyB3ZSBhcmUgYWNjb3VudGluZyBmb3IgdGhlIGxhcmdlIG91dGxpZXJzCiogVGhpcyBtYWtlcyBpdCBsb29rIG1vcmUgbGlrZSBhIG5vcm1hbCBkaXN0cmlidXRpb24KKiBDb3JyZWxhdGlvbiBhbHNvIGluY3JlYXNlcyBieSAwLjA1CiogVGhlIHNjYXR0ZXIgcGxvdCBvZiBsbl9pbmNvbWUgaXMgbXVjaCBtb3JlICdub3JtYWwnIHRoYW4ganVzdCBpbmNvbWUKCgpJbnZlc3RpZ2F0ZSB0aGUgbGFyZ2Ugb3V0bGllcnMgZm9yIGluY29tZToKCmBgYHtyfQpwcmVzdGlnZSAlPiUgCiAgc2xpY2VfbWF4KGluY29tZSwgbiA9IDEwKQpgYGAKCgpTdGFydCBidWlsZGluZyBvdXIgbW9kZWw6CgpCZXN0IHByZWRpY3RvciBmaXJzdAoKYGBge3J9Cm1vZDFhIDwtIGxtKHByZXN0aWdlIH4gZWR1Y2F0aW9uLAogICAgICAgICAgICBkYXRhID0gcHJlc3RpZ2UpCgpzdW1tYXJ5KG1vZDFhKQpgYGAKCkZvciBldmVyeSB1bml0IGluY3JlYXNlIGluIGVkdWNhdGlvbiwgcHJlc3RpZ2UgaW5jcmVhc2VzIDUuMyBpZiBhbGwgb3RoZXIgdmFyaWFibGVzIGFyZSBoZWxkIGNvbnN0YW50LgpFZHVjYXRpb24gY2FuIGV4cGxhaW4gNzUlIG9mIHZhcmlhdGlvbiBpbiBwcmVzdGlnZQoKYGBge3J9CmF1dG9wbG90KG1vZDFhKQpgYGAKCkFsbCBwbG90cyBsb29rIGdvb2QgLSB3ZSBjb3VsZCBhY2NlcHQgdGhlc2UKCgpgYGB7cn0KbW9kMWIgPC0gbG0ocHJlc3RpZ2UgfiB0eXBlLAogICAgICAgICAgICBkYXRhID0gcHJlc3RpZ2UpCgpzdW1tYXJ5KG1vZDFiKQpgYGAKCiogQmx1ZS1jb2xsYXI6IDM1LjUKKiBXaGl0ZS1jb2xsYXI6IDQyLjIKKiBQcm9mZXNzaW9uYWw6IDc0LjUKKiBSU0Ugc2xpZ2h0bHkgaGlnaGVyICg5LjUpCiogJHJeMiQgc2xpZ2h0bHkgbG93ZXIgKDAuNykgCgoKYGBge3J9CmF1dG9wbG90KG1vZDFiKQpgYGAKCkFsdGhvdWdoIGJvdGggbW9kZWxzIGxvb2sgZ29vZCwgbW9kMWEgaXMgYmV0dGVyIChiYXNlZCBvbiByXjIgYW5kIFJTRSksIHNvIHdlIHdvdWxkIGNob29zZSAnZWR1Y2F0aW9uJyBhcyBvdXIgZmlyc3QgcHJlZGljdG9yCgoKbm93IHdlIHdhbnQgdG8gc2VlIHdoaWNoIHZhcmlhYmxlcyBkZXNjcmliZSB0aGUgJ3Jlc2lkdWUnIC0gdGhlIHVuZXhwbGFpbmVkIG1vZGVsIGVycm9yCgpjaGFuZ2UgdGhlIHZhcmlhYmxlIG9mIGludGVyZXN0CgpgYGB7cn0KcHJlc3RpZ2VfcmVzaWQgPC0gcHJlc3RpZ2VfbG9nICU+JSAKICBhZGRfcmVzaWR1YWxzKG1vZDFhKSAlPiUgCiAgc2VsZWN0KC1jKHByZXN0aWdlLCBlZHVjYXRpb24pKQoKcHJlc3RpZ2VfcmVzaWQgJT4lIAogIHNlbGVjdChyZXNpZCwgZXZlcnl0aGluZygpKSAlPiUgCiAgZ2dwYWlycyhhZXMoY29sb3VyID0gdHlwZSwKICAgICAgICAgICAgICBhbHBoYSA9IDAuNSkpCmBgYAoKdGhpbmdzIHRvIG1vZGVsIG5leHQgYmFzZWQgb24gdGhpczoKCiogbG5faW5jb21lCiogaW5jb21lCiogdHlwZQoKYWRkIHNlY29uZCBwcmVkaWN0b3IgLSB0aGUgb25lIHRoYXQgZXhwbGFpbnMgdGhlIG1vc3QgcmVzaWR1YWwgZXJyb3IKCmBgYHtyfQptb2QyYTwtIGxtKHByZXN0aWdlIH4gZWR1Y2F0aW9uICsgbG5faW5jb21lLAogICAgICAgICAgIGRhdGEgPSBwcmVzdGlnZV9sb2cpCgpzdW1tYXJ5KG1vZDJhKQoKYXV0b3Bsb3QobW9kMmEpCmBgYAoKKiAkcl4yJCBpcyB1cCB0byA4My40CiogUlNFIGlzIGRvd24gdG8gNi45CiogZWR1Y2F0aW9uIGNvZWZmaWNpZW50IGlzIHJlZHVjZWQgdG8gNCAoZnJvbSA1LjMpCiogY2FuJ3QgcmVhbGx5IGNvbXBhcmUgdGhlc2UgY29lZmZpY2llbnRzIGFzIGVkdWNhdGlvbiBpcyBtZWFzdXJlZCBpbiB5ZWFycyBhbmQgbG5faW5jb21lIGlzIGEgbG9nIG9mIGRvbGxhcnMgCgoKYGBge3J9Cm1vZDJiPC0gbG0ocHJlc3RpZ2UgfiBlZHVjYXRpb24gKyBpbmNvbWUsCiAgICAgICAgICAgZGF0YSA9IHByZXN0aWdlX2xvZykKCnN1bW1hcnkobW9kMmIpCgphdXRvcGxvdChtb2QyYikKYGBgCgoqIGxlc3MgcHJlZGljdGl2ZSBvZiBhIG1vZGVsCiogZWR1Y2F0aW9uIGNvZWZmaWNlbnQgbGVzcyBlZmZlY3RlZAoqICRyXjIkIGxlc3MKKiBSRVMgaGlnaGVyCgpXZSBjYW4gKipyZWplY3QqKiB0aGlzIG1vZGVsIGluIGZhdm91ciBvZiAqKm1vZDJhKioKCgpgYGB7cn0KbW9kMmM8LSBsbShwcmVzdGlnZSB+IGVkdWNhdGlvbiArIHR5cGUsCiAgICAgICAgICAgZGF0YSA9IHByZXN0aWdlX2xvZykKCnN1bW1hcnkobW9kMmMpCgphdXRvcGxvdChtb2QyYykKYGBgCgoqIHR5cGUgbGFiZWxzIGV4cGxhaW4gKipsZXNzKiogd2hlbiBlZHVjYXRpb24gaXMgY29udHJvbGxlZAoqICRyXjIkIGlzIGRvd24KKiBSU0UgaXMgaGlnaGVyCgp3aGVuICd0eXBlJyBjYXRlZ29yaWVzIGhhdmUgZGlmZmVyZW50IGxldmVscyBvZiBzaWduaWZpY2FuY2UsIGVpdGhlciB1c2UgKiphbGwqKiBvciAqKm5vbmUqKiBvZiB0aGVtIC0gKipET04iVCBDSEVSUlkgUElDSyoqCgpXZSBjYW4gKipyZWplY3QqKiB0aGlzIG1vZGVsIGluIGZhdm91ciBvZiAqKm1vZDJhKioKClJlc3VsdHM6CgoqICoqbW9kMmEqKiBnYXZlIGJpZ2dlc3QgdXBsaWZ0IGluICRyXjIkCiogcmVzaWR1YWxzIGxvb2sgZ3JlYXQKCgpjaGVjayBmb3Igc2lnbmlmaWNhbmNlIG9mIGNhdGVnb3JpY2FsOiBBTk9WQSB0ZXN0CmBgYHtyfQphbm92YShtb2QxYSwgbW9kMmMpCmBgYAoKdGhlIG1lYW5zIG9mIHRoZSBjYXRlZ29yaWNhbCBsZXZlbHMgYXJlIHN0YXRpc3RpY2FsbHkgZGlmZmVyZW50LCB0aGVyZWZvcmUgKiphbGwgMyBsZXZlbHMqKiBjb3VsZCBiZSBpbmNsdWRlZAoKCnRoaXJkIHByZWRpY3RvcgoKYGBge3J9CnByZXN0aWdlX3Jlc2lkIDwtIHByZXN0aWdlX2xvZyAlPiUgCiAgYWRkX3Jlc2lkdWFscyhtb2QyYSkgJT4lIAogIHNlbGVjdCgtYyhwcmVzdGlnZSwgZWR1Y2F0aW9uLCBsbl9pbmNvbWUpKQoKcHJlc3RpZ2VfcmVzaWQgJT4lIAogIHNlbGVjdChyZXNpZCwgZXZlcnl0aGluZygpKSAlPiUKICBnZ3BhaXJzKGFlcyhjb2xvdXIgPSB0eXBlLAogICAgICAgICAgICAgIGFscGhhID0gMC41KSkKYGBgCgp0aGluZ3MgdG8gbW9kZWwgbmV4dCBiYXNlZCBvbiB0aGlzOgoKKiB0eXBlCiogd29tZW4KCmBgYHtyfQptb2QzYSA8LSBsbShwcmVzdGlnZSB+IGVkdWNhdGlvbiArIGxuX2luY29tZSArIHR5cGUsCiAgICAgICAgICAgZGF0YSA9IHByZXN0aWdlX2xvZykKCnN1bW1hcnkobW9kM2EpCgphdXRvcGxvdChtb2QzYSkKYGBgCgpkbyBhbiBhbm92YSB0ZXN0IG9uIHR5cGVzCgpgYGB7cn0KYW5vdmEobW9kMmEsIG1vZDNhKQpgYGAKCnRoZSByZXN1bHRzIGFyZSBzaWduaWZpY2FudCAtIHdlIGNhbiBpbmNsdWRlCgoqKmFkZCB0eXBlIHRvIG1vZGVsKioKCmBgYHtyfQptb2QzYiA8LSBsbShwcmVzdGlnZSB+IGVkdWNhdGlvbiArIGxuX2luY29tZSArIHdvbWVuLAogICAgICAgICAgIGRhdGEgPSBwcmVzdGlnZV9sb2cpCgpzdW1tYXJ5KG1vZDNiKQoKYXV0b3Bsb3QobW9kM2IpCgpgYGAKCgpJbnRlcmFjdGlvbnMgKGJldHdlZW4gbWFpbiBlZmZlY3RzKQoKYGBge3J9CnByZXN0aWdlX3Jlc2lkIDwtIHByZXN0aWdlX2xvZyAlPiUgCiAgYWRkX3Jlc2lkdWFscyhtb2QzYSkgJT4lIAogIHNlbGVjdCgtcHJlc3RpZ2UpCmBgYAoKYGBge3J9CmNvcGxvdChyZXNpZCB+IGxuX2luY29tZSB8IGVkdWNhdGlvbiwKICAgICAgIHBhbmVsID0gZnVuY3Rpb24oeCwgeSwgLi4uKSB7CiAgICAgICAgIHBvaW50cyh4LCB5KQogICAgICAgICBhYmxpbmUobG0oeSB+IHgpLCBjb2wgPSAiYmx1ZSIpCiAgICAgICB9LAogICAgICAgZGF0YSA9IHByZXN0aWdlX3Jlc2lkLAogICAgICAgcm93cyA9IDEpCmBgYAoKCiogb3ZlciBwcmVkaWN0aW5nIGF0IGhpZ2hlciBpbmNvbWUKKiB1bmRlciBwcmVkaWN0aW5nIGF0IGxvd2VyIGluY29tZQoKCmBgYHtyfQpwcmVzdGlnZV9yZXNpZCAlPiUgCiAgZ2dwbG90KGFlcyh4ID0gZWR1Y2F0aW9uLAogICAgICAgICAgICAgeSA9IHJlc2lkLAogICAgICAgICAgICAgY29sb3VyID0gdHlwZSkpICsKICBnZW9tX3BvaW50KCkrCiAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxtIiwgc2UgPSBGQUxTRSkKYGBgCgoqIHRoZXJlIGlzIGEgZGlmZmVyZW5jZSBpbiB0eXBlIGxldmVscwoqIG1vZGVsIGNvdWxkIGJlbmVmaXQgZnJvbSBpbmNsdWRpbmcgdGhpcyAKCgpgYGB7cn0KcHJlc3RpZ2VfcmVzaWQgJT4lIAogIGdncGxvdChhZXMoeCA9IGxuX2luY29tZSwKICAgICAgICAgICAgIHkgPSByZXNpZCwKICAgICAgICAgICAgIGNvbG91ciA9IHR5cGUpKSArCiAgZ2VvbV9wb2ludCgpKwogIGdlb21fc21vb3RoKG1ldGhvZCA9ICJsbSIsIHNlID0gRkFMU0UpCmBgYAoKKiB0aGVyZSBpcyBhIGRpZmZlcmVuY2UgaW4gdHlwZSBsZXZlbHMKKiBtb2RlbCBjb3VsZCBiZW5lZml0IGZyb20gaW5jbHVkaW5nIHRoaXMgCgpJbmNsdWRpbmcgdGhlIGludGVyYWN0aW9ucwoKYGBge3J9Cm1vZDRhIDwtIGxtKHByZXN0aWdlIH4gZWR1Y2F0aW9uICsgbG5faW5jb21lICsgdHlwZSArIGVkdWNhdGlvbjpsbl9pbmNvbWUsCiAgICAgICAgICAgIGRhdGEgPSBwcmVzdGlnZV9sb2cpCgpzdW1tYXJ5KG1vZDRhKQpgYGAKCiogZWR1Y2F0aW9uOmxuX2luY29tZSBpcyBvbmx5IGEgJy4nIHNpZ25pZmljYW5jZQogICsgbWF5YmUgZG9uJ3QgaW5jbHVkZQoKCmBgYHtyfQptb2Q0YiA8LSBsbShwcmVzdGlnZSB+IGVkdWNhdGlvbiArIGxuX2luY29tZSArIHR5cGUgKyBlZHVjYXRpb246dHlwZSwKICAgICAgICAgICAgZGF0YSA9IHByZXN0aWdlX2xvZykKCnN1bW1hcnkobW9kNGIpCmBgYAoKCgpgYGB7cn0KbW9kNGMgPC0gbG0ocHJlc3RpZ2UgfiBlZHVjYXRpb24gKyBsbl9pbmNvbWUgKyB0eXBlICsgdHlwZTpsbl9pbmNvbWUsCiAgICAgICAgICAgIGRhdGEgPSBwcmVzdGlnZV9sb2cpCgpzdW1tYXJ5KG1vZDRjKQpgYGAKCgpgYGB7cn0KYXV0b3Bsb3QobW9kNGMpCmBgYAoKCmBgYHtyfQphbm92YShtb2QzYSwgbW9kNGMpCmBgYAoKClJlbGF0aXZlIEltcG9ydGFuY2UKCmBgYHtyfQpsaWJyYXJ5KHJlbGFpbXBvKQoKY2FsYy5yZWxpbXAobW9kM2EsIHR5cGUgPSAibG1nIiwgcmVsYSA9IFRSVUUpCmBgYAoKCmBgYHtyfQpjYWxjLnJlbGltcChtb2Q0YywgdHlwZSA9ICJsbWciLCByZWxhID0gVFJVRSkKYGBgCgoKCgo=